* this program takes the six annual data sets from OSPHD and constructs;
* in SAS a research data set.  the output has a lot more variables that'
* are eventually used in the analysis.  this data set is then transfered;
* into STATA format and we construct the analysis 

libname delivery 'k:\delivery\';
*get the total dataset;


* there are 6 annual data set. stack them on top of each other;
data delivery.total;
set delivery.evans_wei95 delivery.evans_wei96 delivery.evans_wei97 delivery.evans_wei98
delivery.evans_wei99 delivery.evans_wei00;
run;

* identify via DRGs what are childbirths in the data set;
data delivery.total;
set delivery.total;
if agedaysI<=1 and agedaysI>= -3 and disdatem >= bthdatei and _diffi=0
and (_diffm <=0 or _diffm='') and 
(drgm=370 or drgm=371 or drgm=372 or drgm=373 or drgm=374 or drgm=375) and
(drgi= 385 or drgi= 386 or drgi= 387 or drgi= 388 or drgi= 389 or drgi = 390 or drgi= 391) 
then birth=1;
else birth=0;
run;


* construct a sample that only includes births;
data delivery.birth;
set delivery.total;
if birth=1;
run;


* some of the obs do not have a birth id;
* in the code below, we infer it was a liev birth;
* from some other variables in the data set;
*check obs without birth id;
data delivery.bid0;
set delivery.total;
if _brthid='';
run;
data delivery.total;
set delivery.total;
if _brthid='' then delete;
run;

proc sql; 
create table delivery.rlni_missing as 
select rlni, count(rlni) as no_rlni
from delivery.bid0
group by rlni;
quit;
run;
proc sql; 
create table delivery.rlni_birth as 
select rlni,count(rlni) as no_rlni_b
from delivery.birth
group by rlni;
quit;
run;

proc sql;
create table delivery.rlni as 
select rlni_missing.rlni,rlni_missing.no_rlni, rlni_birth.no_rlni_b 
from delivery.rlni_missing inner join delivery.rlni_birth
on rlni_missing.rlni=rlni_birth.rlni;
quit;
run;
data delivery.rlni;
set delivery.rlni;
if no_rlni_b>3 then delete;
run;
proc sql;
create table delivery.rlni as
select rlni.*, birth._brthid
from delivery.rlni inner join delivery.birth 
on rlni.rlni=birth.rlni;
quit;
run;
data delivery.bid0;
set delivery.bid0;
drop _brthid;
run;

proc sql;
create table delivery.bid0_id as 
select bid0.*, rlni._brthid
from delivery.bid0 inner join delivery.rlni
on bid0.rlni=rlni.rlni;
quit;
run;
data delivery.total;
set delivery.total delivery.bid0_id;
run;

*83413 observation with no birthid, only birth id of 5142 out of 83413 obs could be
recovered by using rlni;
proc sort data=delivery.total;
by _brthid disdatem disdatei;
run;
*merge the birthday to the obs with same birth id;
proc sql;
create table delivery.birthi as
select distinct _brthid, bthdatei
from delivery.birth
group by _brthid;
quit;
run;


data delivery.birthi;
set delivery.birthi;
if bthdatei='.' then delete;
run;
data delivery.total;
set delivery.total;
drop bthdatei;
run;

proc sql;
create table delivery.total as
select total.*,birthi.bthdatei 
from delivery.total inner join delivery.birthi
on total._brthid=birthi._brthid;
quit;
run;



data delivery.total;
set delivery.total;
if bthdatei>0;
run;

*define readmission rate for the mother. we ended up not using;
* these variables in the analysis.;
data delivery.total;
set delivery.total;

*7, 14, 28, 180 and 360 day readmission rate;
if birth=0 and disdatem-_losm <= bthdatei +7 and disdatem-_losm >= bthdatei then 
readmission7m=1;
else readmission7m=0;

if birth=0 and disdatem-_losm <= bthdatei +14 and disdatem-_losm >= bthdatei then 
readmission14m=1;
else readmission14m=0;

if birth=0 and disdatem-_losm <= bthdatei + 28 and disdatem-_losm >= bthdatei  
then readmission28m=1;
else readmission28m=0;

if birth=0 and disdatem -_losm <= bthdatei +180 and disdatem-_losm >= bthdatei then 
readmission180m=1;
else readmission180m=0;

if birth=0 and disdatem -_losm <= bthdatei + 360 and disdatem-_losm >= bthdatei then 
readmission360m=1;
else readmission360m=0; 



* create readmission rates for the infants -- these are the key outcomes in the analysis;
if birth=0 and disdatem-_losm <= bthdatei +7 and disdatem-_losm >= bthdatei then 
readmission7m=1;
else readmission7m=0;

if birth=0 and disdatem-_losm <= bthdatei +14 and disdatem-_losm >= bthdatei then 
readmission14m=1;
else readmission14m=0;

if birth=0 and disdatem-_losm <= bthdatei + 28 and disdatem-_losm >= bthdatei  
then readmission28m=1;
else readmission28m=0;

if birth=0 and disdatem -_losm <= bthdatei +180 and disdatem-_losm >= bthdatei then 
readmission180m=1;
else readmission180m=0;

if birth=0 and disdatem -_losm <= bthdatei + 360 and disdatem-_losm >= bthdatei then 
readmission360m=1;
else readmission360m=0; 



if birth=0 and disdatem <= bthdatei then prenatal_admission=1;
else prenatal_admission=0;

* define postnatal deaths;
if dthdate<= bthdatei +7 and dthdate >= bthdatei then 
death7i=1;
else death7i=0;

if dthdate<= bthdatei +14 and dthdate >= bthdatei then 
death14i=1;
else death14i=0;

if dthdate<= bthdatei + 28 and dthdate>= bthdatei then 
death28i=1;
else death28i=0;

if dthdate<= bthdatei + 180 and dthdate>=bthdatei then 
death180i=1;
else death180i=0;

if dthdate<= bthdatei + 360 and dthdate >= bthdatei then 
death360i=1;
else death360i=0; 

run;
proc sql;
create table delivery.readmission as 
select distinct _brthid, max(readmission7m) as readmission7m, max(readmission14m) as readmission14m,
max(readmission28m) as readmission28m, max(readmission180m) as readmission180m, 
max(readmission360m) as readmission360m,
max(readmission7i) as readmission7i, max(readmission14i) as readmission14i, 
max(readmission28i) as readmission28i,max(readmission56i) as readmission56i,
max(readmission112i) as readmission112i,max(readmission180i) as readmission180i,
max(readmission360i) as readmission360i, max(prenatal_admission) as prenatal_admission,
max(death7i) as death7i, max(death14i) as death14i, max(death28i) as death28i,
max(death180i) as death180i, max(death360i) as death360i   
from delivery.total
group by _brthid;
quit;
run;


data delivery.birth;
set delivery.total;
if birth=1;

drop prenatal_admission
readmission7m
readmission14m
readmission28m
readmission180m
readmission360m
readmission7i
readmission14i
readmission28i
readmission56i
readmission112i
readmission180i
readmission360i
death7i
death14i
death28i
death180i
death360i;
run;
proc sql;
create table delivery.birth as 
select birth.*, readmission.readmission7m,
readmission.readmission14m,
readmission.readmission28m, 
readmission.readmission180m,
readmission.readmission360m,
readmission.readmission7i,
readmission.readmission14i,
readmission.readmission28i, readmission.readmission56i,
readmission.readmission112i,
readmission.readmission180i,
readmission.readmission360i,
readmission.prenatal_admission,
readmission.death7i,readmission.death14i,
readmission.death28i,readmission.death180i,
readmission.death360i
from delivery.birth inner join delivery.readmission
on birth._brthid=readmission._brthid;
quit;
run;


* here we construct a trend variable that equals 1 in the 1st month;
* (January of 1995) and 72 in the final month (December 2000);
* this is not the most efficient programming but it works;
data delivery.birth;
set delivery.birth;
IF bthdatei<= '31JAN95'D & bthdatei>= '01JAN95'D THEN TREND=1;
ELSE IF bthdatei<= '28FEB95'D & bthdatei>= '01FEB95'D THEN TREND=2;
ELSE IF bthdatei<= '31MAR95'D & bthdatei>= '01MAR95'D THEN TREND=3;
ELSE IF bthdatei<= '30APR95'D & bthdatei>= '01APR95'D THEN TREND=4;
ELSE IF bthdatei<= '31MAY95'D & bthdatei>= '01MAY95'D THEN TREND=5;
ELSE IF bthdatei<= '30JUN95'D & bthdatei>= '01JUN95'D THEN TREND=6;
ELSE IF bthdatei<= '31JUL95'D & bthdatei>= '01JUL95'D THEN TREND=7;
ELSE IF bthdatei<= '31AUG95'D & bthdatei>= '01AUG95'D THEN TREND=8;
ELSE IF bthdatei<= '30SEP95'D & bthdatei>= '01SEP95'D THEN TREND=9;
ELSE IF bthdatei<= '31OCT95'D & bthdatei>= '01OCT95'D THEN TREND=10;
ELSE IF bthdatei<= '30NOV95'D & bthdatei>= '01NOV95'D THEN TREND=11;
ELSE IF bthdatei<= '31DEC95'D & bthdatei>= '01DEC95'D THEN TREND=12;

else IF bthdatei<= '31JAN96'D & bthdatei>= '01JAN96'D THEN TREND=13;
ELSE IF bthdatei<= '29FEB96'D & bthdatei>= '01FEB96'D THEN TREND=14;
ELSE IF bthdatei<= '31MAR96'D & bthdatei>= '01MAR96'D THEN TREND=15;
ELSE IF bthdatei<= '30APR96'D & bthdatei>= '01APR96'D THEN TREND=16;
ELSE IF bthdatei<= '31MAY96'D & bthdatei>= '01MAY96'D THEN TREND=17;
ELSE IF bthdatei<= '30JUN96'D & bthdatei>= '01JUN96'D THEN TREND=18;
ELSE IF bthdatei<= '31JUL96'D & bthdatei>= '01JUL96'D THEN TREND=19;
ELSE IF bthdatei<= '31AUG96'D & bthdatei>= '01AUG96'D THEN TREND=20;
ELSE IF bthdatei<= '30SEP96'D & bthdatei>= '01SEP96'D THEN TREND=21;
ELSE IF bthdatei<= '31OCT96'D & bthdatei>= '01OCT96'D THEN TREND=22;
ELSE IF bthdatei<= '30NOV96'D & bthdatei>= '01NOV96'D THEN TREND=23;
ELSE IF bthdatei<= '31DEC96'D & bthdatei>= '01DEC96'D THEN TREND=24;

ELSE IF bthdatei<= '31JAN97'D & bthdatei>= '01JAN97'D THEN TREND=25;
ELSE IF bthdatei<= '28FEB97'D & bthdatei>= '01FEB97'D THEN TREND=26;
ELSE IF bthdatei<= '31MAR97'D & bthdatei>= '01MAR97'D THEN TREND=27;
ELSE IF bthdatei<= '30APR97'D & bthdatei>= '01APR97'D THEN TREND=28;
ELSE IF bthdatei<= '31MAY97'D & bthdatei>= '01MAY97'D THEN TREND=29;
ELSE IF bthdatei<= '30JUN97'D & bthdatei>= '01JUN97'D THEN TREND=30;
ELSE IF bthdatei<= '31JUL97'D & bthdatei>= '01JUL97'D THEN TREND=31;
ELSE IF bthdatei<= '31AUG97'D & bthdatei>= '01AUG97'D THEN TREND=32;
ELSE IF bthdatei<= '30SEP97'D & bthdatei>= '01SEP97'D THEN TREND=33;
ELSE IF bthdatei<= '31OCT97'D & bthdatei>= '01OCT97'D THEN TREND=34;
ELSE IF bthdatei<= '30NOV97'D & bthdatei>= '01NOV97'D THEN TREND=35;
ELSE IF bthdatei<= '31DEC97'D & bthdatei>= '01DEC97'D THEN TREND=36;

ELSE IF bthdatei<= '31JAN98'D & bthdatei>= '01JAN98'D THEN TREND=37;
ELSE IF bthdatei<= '28FEB98'D & bthdatei>= '01FEB98'D THEN TREND=38;
ELSE IF bthdatei<= '31MAR98'D & bthdatei>= '01MAR98'D THEN TREND=39;
ELSE IF bthdatei<= '30APR98'D & bthdatei>= '01APR98'D THEN TREND=40;
ELSE IF bthdatei<= '31MAY98'D & bthdatei>= '01MAY98'D THEN TREND=41;
ELSE IF bthdatei<= '30JUN98'D & bthdatei>= '01JUN98'D THEN TREND=42;
ELSE IF bthdatei<= '31JUL98'D & bthdatei>= '01JUL98'D THEN TREND=43;
ELSE IF bthdatei<= '31AUG98'D & bthdatei>= '01AUG98'D THEN TREND=44;
ELSE IF bthdatei<= '30SEP98'D & bthdatei>= '01SEP98'D THEN TREND=45;
ELSE IF bthdatei<= '31OCT98'D & bthdatei>= '01OCT98'D THEN TREND=46;
ELSE IF bthdatei<= '30NOV98'D & bthdatei>= '01NOV98'D THEN TREND=47;
ELSE IF bthdatei<= '31DEC98'D & bthdatei>= '01DEC98'D THEN TREND=48;

ELSE IF bthdatei<= '31JAN99'D & bthdatei>='01JAN99'D THEN TREND=49;
ELSE IF bthdatei<= '28FEB99'D & bthdatei>= '01FEB99'D THEN TREND=50;
ELSE IF bthdatei<= '31MAR99'D & bthdatei>= '01MAR99'D THEN TREND=51;
ELSE IF bthdatei<= '30APR99'D & bthdatei>= '01APR99'D THEN TREND=52;
ELSE IF bthdatei<= '31MAY99'D & bthdatei>= '01MAY99'D THEN TREND=53;
ELSE IF bthdatei<= '30JUN99'D & bthdatei>= '01JUN99'D THEN TREND=54;
ELSE IF bthdatei<= '31JUL99'D & bthdatei>= '01JUL99'D THEN TREND=55;
ELSE IF bthdatei<= '31AUG99'D & bthdatei>= '01AUG99'D THEN TREND=56;
ELSE IF bthdatei<= '30SEP99'D & bthdatei>= '01SEP99'D THEN TREND=57;
ELSE IF bthdatei<= '31OCT99'D & bthdatei>= '01OCT99'D THEN TREND=58;
ELSE IF bthdatei<= '30NOV99'D & bthdatei>= '01NOV99'D THEN TREND=59;
ELSE IF bthdatei<= '31DEC99'D & bthdatei>= '01DEC99'D THEN TREND=60;

ELSE IF bthdatei<= '31JAN00'D & bthdatei>= '01JAN00'D THEN TREND=61;
ELSE IF bthdatei<= '29FEB00'D & bthdatei>= '01FEB00'D THEN TREND=62;
ELSE IF bthdatei<= '31MAR00'D & bthdatei>= '01MAR00'D THEN TREND=63;
ELSE IF bthdatei<= '30APR00'D & bthdatei>= '01APR00'D THEN TREND=64;
ELSE IF bthdatei<= '31MAY00'D & bthdatei>= '01MAY00'D THEN TREND=65;
ELSE IF bthdatei<= '30JUN00'D & bthdatei>= '01JUN00'D THEN TREND=66;
ELSE IF bthdatei<= '31JUL00'D & bthdatei>= '01JUL00'D THEN TREND=67;
ELSE IF bthdatei<= '31AUG00'D & bthdatei>= '01AUG00'D THEN TREND=68;
ELSE IF bthdatei<= '30SEP00'D & bthdatei>= '01SEP00'D THEN TREND=69;
ELSE IF bthdatei<= '31OCT00'D & bthdatei>= '01OCT00'D THEN TREND=70;
ELSE IF bthdatei<= '30NOV00'D & bthdatei>= '01NOV00'D THEN TREND=71;
ELSE IF bthdatei<= '31DEC00'D & bthdatei>= '01DEC00'D THEN TREND=72;
RUN;


* here we construct a categorical variable that indicates the;
* payer for the delivery;

data delivery.birth;
set delivery.birth;
if paymsold= '06'or paymsold= '07'or paymsold= '08' then payer_delivery=1;
else if paymsold='02' then payer_delivery=2;
else if paymsold='01' or paymsold='03'or paymsold='04' or paymsold='05'
or paymsold='11'or paymsold='12' then payer_delivery=3;
else if paymsold='09' then payer_delivery=4;
else if paymsold= '00' or paymsold= '10' or paymsold= '99' then payer_delivery=5;
label payer_delivery='1=private insurance,2=medicaid,3=other insurance,4=no insurance,
5=unknown';
run;



* we deflate the total charges to turn them into real dollars;
* this is not done correctly here -- we really should use;
* a monthly cpi but these were not critical for the analysis;
* so we never went back and fixed it;
data delivery.birth;
set delivery.birth;
if trend>=1 and trend<=12 then do chargesm_new= _chargesm*1.2395;
chargesi_new= _chargesi*1.2395; totalcharge_new=totalcharge*1.2395; end;
else if trend>=13 and trend<=24 then do chargesm_new= _chargesm*1.204;
chargesi_new= _chargesi*1.204; totalcharge_new=totalcharge*1.204; end;
else if trend>=25 and trend<=36 then do chargesm_new= _chargesm*1.177;
chargesi_new= _chargesi*1.177; totalcharge_new=totalcharge*1.177; end;
else if trend>=37 and trend<=48 then do chargesm_new= _chargesm*1.159;
chargesi_new= _chargesi*1.159; totalcharge_new=totalcharge*1.159; end;
else if trend>=49 and trend<=60 then do chargesm_new= _chargesm*1.134;
chargesi_new= _chargesi*1.134; totalcharge_new=totalcharge*1.134; end;
else if trend>=61 and trend<=72 then do chargesm_new= _chargesm*1.097;
chargesi_new= _chargesi*1.097; totalcharge_new=totalcharge*1.097; end;
if admyrm<1995 then delete;
totalcharge_new= chargesm_new + chargesi_new;
log_chargesm=log(chargesm_new);
log_chargesi=log(chargesi_new);
log_totalcharge=log(totalcharge_new);
run;


* we construct dummy variables for when the state law was in effect only;
* (August - December 1997) and when the federal law comes into effect;
* (January 1998);
data delivery.birth;
set delivery.birth;
if trend <= 32 then statelaw=0;
else if trend > 32 and trend < 37 then statelaw=1;
else if trend >=37 then statelaw=0;
if trend <= 36 then fedlaw=0;
else if trend > 36 then fedlaw=1;
run;

* we construct these indicators here but use a different one in;
* the stata data set;
data delivery.birth;
set delivery.birth;
if _losi < 2 then less_than_twodays_baby =1;
else if _losi >=2 then less_than_twodays_baby =0;
if _losi < 4 then less_than_fourdays_baby =1;
else if _losi >=4 then less_than_fourdays_baby=0;
run;


* put moms into age groups;
data delivery.birth;
set delivery.birth;
if ageyrsm<= 20 then agegroup=20;
else if ageyrsm >20 & ageyrsm <= 25 then agegroup = 25;
else if ageyrsm > 25 & ageyrsm <= 30 then agegroup = 30 ;
else if ageyrsm > 30 & ageyrsm <= 35 then agegroup = 35 ;
else if ageyrsm > 35 & ageyrsm <= 40 then agegroup = 40 ;
else if ageyrsm > 40 then agegroup = 45 ;
label agegroup='20=less than and equal to 20, 25=from 20 to 25, 30=from 25 to 30,
35=from 30 to 35, 40= from 35 to 40, 45=above 40';
run;

* construct a new racial group indicators;
data delivery.birth;
set delivery.birth;
mrace_new= int(mrace/10);
label mrace_new='1=white,2=black,3=american indian,4=asian,5=other counties,9=unknow';
frace_new= int(frace/10);
label frace_new='1=white,2=black,3=american indian,4=asian,5=other counties,9=unknow';


* redefine children ever born;
data delivery.birth;
set delivery.birth;
if ceb=1 then previous_birth=0;
else if ceb=2 then previous_birth=1;
else if ceb=3 then previous_birth=2;
else if ceb=4 then previous_birth=3;
else if ceb>=5 then previous_birth=4;
run;


proc sql;
create table delivery.birth as
select birth.*,case_hsa.avg_case, case_hsa.deliverysize,case_hsa.hplhsa,
case_hsa.hospital_owner
from delivery.birth inner join delivery.case_hsa
on birth.hospidm=case_hsa.hospidm;
quit;
run;
data delivery.birth;
set delivery.birth;
if hisphm='1' or hisphm='2' or hisphm='3';
run;


data delivery.birth;
set delivery.birth;
bthhour=bthhour*1;
birth_hour=int(bthhour/100);
birth_min=int(bthhour)-birth_hour*100;
run; 
data delivery.birth;
set delivery.birth;
if birth_hour=99 or birth_hour=24 then delete;
run;



* keep some key variables;
data delivery.stata;
set delivery.birth;
keep log_totalcharge chargesi chargesm prevsts paymsopc nbiccl ceb paymsold admyrM paymso95M paymso95I pay_catM pay_catM pay_planM 	
readmission7i readmission14i readmission28i fedlaw 
statelaw adm90to180 payer_delivery trend hplhsa HOSPIDM cntyresm chargesi chargesm bthwght gest  pay_cati pay_cati pay_plani 
deliverysize delivery previous_birth agegroup meduc mrace_new hisphm feduc frace_new sex birth_hour disstat95i
admdaym admmnthi admyri hospital_owner _losi _losm typebth compdrg complicated drgm drgi probl_1 probl_2 probl_3;
run;

